#!/usr/bin/env perl

############################################################################           
#  Licensed to the Apache Software Foundation (ASF) under one or more                  
#  contributor license agreements.  See the NOTICE file distributed with               
#  this work for additional information regarding copyright ownership.                 
#  The ASF licenses this file to You under the Apache License, Version 2.0             
#  (the "License"); you may not use this file except in compliance with                
#  the License.  You may obtain a copy of the License at                               
#                                                                                      
#      http://www.apache.org/licenses/LICENSE-2.0                                      
#                                                                                      
#  Unless required by applicable law or agreed to in writing, software                 
#  distributed under the License is distributed on an "AS IS" BASIS,                   
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
#  See the License for the specific language governing permissions and                 
#  limitations under the License.                                                      
                                                                                       
###############################################################################
# Nightly tests for pig.
#
#

#PigSetup::setup();


$cfg = {
	'driver' => 'Pig',
	'nummachines' => 5,

	'groups' => [
		{
		'name' => 'HadoopError',
		'tests' => [
			{
			'num' => 1,
			'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, COUNT($5);
dump c;\,
			'expected_err_regex' => "Out of bound access. Trying to access non-existent column: 5.",
			},
		]
		},
		{
		'name' => 'NoSuchFile',
		'tests' => [
			{
			'num' => 1,
			'pig' => q\a = load '/user/gates/nosuchfile'; dump a;\,
			'expected_err_regex' => "ERROR 2118: Input path does not exist",
			},
			{
			'num' => 2,
			'pig' => q\register bla.jar\,
			'expected_err_regex' => "ERROR 101: file 'bla.jar' does not exist.",
			}
		]
		},
		{
		'name' => 'BadFunc',
		'tests' => [
			{ # PIG-431
			'num' => 1,
			'pig' => "a = load ':INPATH:/singlefile/studenttab10k' using NoSuchFunction(':');",
			'expected_err_regex' => "Could not resolve NoSuchFunction using imports",
			},
		]
		},
		{
		'name' => 'FileExists',
		'tests' => [
			{
			'num' => 1,
			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
store a into ':INPATH:/singlefile/fileexists';\,
			'expected_err_regex' => ".* already exists",
			},
		]
		},
		{
		'name' => 'NegForeach',
        'tests' => [
            {
            'num' => 1,
            'ignore' => 1, # it is valid now
            # testing that nested foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = group a by name;
                        c = foreach b { ba = filter a by age < '25'; bb = foreach ba generate gpa; generate group, flatten(bb);}\,
            'expected_err_regex' => " Encountered \" \"foreach\" \"foreach \"\" at line.*\nWas expecting one of:",
            },
            {
            'num' => 2,
            # testing that group within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = group a by name;
                        c = foreach b { ba = filter a by age < '25'; g = group ba by gpa; x = foreach g generate group, flatten(g); generate x;}\,
            'expected_err_regex' => "Syntax error, unexpected symbol at or near 'ba'",
            },
            {
            'num' => 3,
            # testing that cogroup within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
                        c = cogroup a by (name, age), b by (name, age) ;
                        d = foreach c { e = cogroup a by (name), b by (name); x= foreach a generate flatten(a), flatten(b); generate x;}\,
            'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN",
            },
            {
            'num' => 4,
            # testing that join within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
                        c = cogroup a by (name), b by (name) ;
                        d = foreach c { e = join a by (age), b by (age); generate e;}\,
            'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN",
            },
            {
            'num' => 5,
            'ignore' => 1, # it is valid now
            # testing that cross within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
                        c = cogroup a by (name), b by (name) ;
                        d = foreach c { e = cross a,b; generate e;}\,
            'expected_err_regex' => "Encountered \" \"cross\" \"cross \"\" at .*\nWas expecting one of",            
            },
            {
            'num' => 6,
            # testing that union within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
                        c = cogroup a by (name), b by (name) ;
                        d = foreach c { e = union a,b; generate e;}\,
            'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN",
            },
            {
            'num' => 7,
            # testing that split within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = group a by name;
                        c = foreach b { split a into ba if age < '25', bb if age > '40'; generate group, COUNT(ba), COUNT(bb);}\,
            'expected_err_regex' => "Syntax error, unexpected symbol at or near 'split'",
            },
            {
            'num' => 8,
            # testing that load within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = group a by name;
                        c = foreach b { 
                                d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); 
                                generate *;}\,
            'expected_err_regex' => "mismatched input ''.*/singlefile/votertab10k'' expecting LEFT_PAREN",
            },
            {
            'num' => 9,
            # testing that store within foreach is not allowed
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = group a by name;
                        c = foreach b { store a into ':OUTPATH:'; generate *;}\,
            'expected_err_regex' => "Syntax error, unexpected symbol at or near 'store'",
            },
        ]
		
		},
        {
        # test not allowed operations
        'name' => 'NotAllowed',
        'tests' => [

            {
            # currently (as of 09/18/2008), the following are not allowed
            #a = b;
            'num' => 1,
            'ignore' => 1, # different error message for different version of hadoop
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = a;\,
            'expected_err_regex' => "mismatched input ';' expecting LEFT_PAREN",
            },
            {
            # currently (as of 09/18/2008), the following are not allowed
            #a = b as (x,y,z);
            'num' => 2,
            'ignore' => 1, # different error message for different version of hadoop
            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = a as (x,y,z);\,
            'expected_err_regex' => "mismatched input 'as' expecting LEFT_PAREN",
            },
        ]
        },
        {
        # test  with udf which throws exception
        'name' => 'UdfException',
        'tests' => [

            {
            'num' => 1,
            'pig' => q\
                        register :FUNCPATH:/testudf.jar;
                        a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        c = cogroup a by name, b by name;
                        d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf(a,b));
                        store d into ':OUTPATH:';\,
	    'expected_err_regex' => "Out of bounds access",
            'expected_err_regex23' => "Unable to recreate exception",
            },
            {
            'num' => 2,
            'pig' => q\
                        register :FUNCPATH:/testudf.jar;
                        a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        c = cogroup a by name, b by name;
                        d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf2(a,b));
                        store d into ':OUTPATH:';\,
            'expected_err_regex' => "Out of bounds access",
            'expected_err_regex23' => "Unable to recreate exception",
            },
            {
            'num' => 3,
            'pig' => q\
                        register :FUNCPATH:/testudf.jar;
                        a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        c = cogroup a by name, b by name;
                        d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf3(a,b));
                        store d into ':OUTPATH:';\,
            'expected_err_regex' => "Out of bounds access",
            'expected_err_regex23' => "Unable to recreate exception",
            },
            {
            'num' => 4,
            'pig' => q\
                        register :FUNCPATH:/testudf.jar;
                        a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                        c = cogroup a by name, b by name;
                        d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf4(a,b));
                        store d into ':OUTPATH:';\,
            'expected_err_regex' => "ERROR 2078: .*",
            'expected_err_regex23' => "Unable to recreate exception",
            },
        ]
        },
		{
		# test for sytax errors
                'name' => 'SyntaxErrors',
                'tests' => [
                        {
			# missing quotes around command
                        'num' => 1,
                        'pig' => q#
A = load ':INPATH:/singlefile/studenttab10k';
B = foreach A generate $2, $1, $0;
C = stream B through awk 'BEGIN {FS = "\t"; OFS = "\t"} {print $3, $2, $1}';
dump C;#,
                        #'expected_err_regex' => "mismatched input ''BEGIN {FS = \"\\t\"; OFS = \"\\t\"} {print \$"."3, \$"."2, \$"."1}'' expecting SEMI_COLON",
                        'expected_err_regex' => "mismatched input ''BEGIN {.*} {.*}'' expecting SEMI_COLON",
                        },
                        {
			# input spec missing parenthesis
                        'num' => 2,
                        'pig' => q#
define CMD `perl PigStreaming.pl foo -` input 'foo' using PigStorage() ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
                        'expected_err_regex' => "mismatched input ''foo'' expecting LEFT_PAREN",
                        },
                        {
			# no serializer name after using
                        'num' => 3,
                        'pig' => q#
define CMD `perl PigStreaming.pl foo -` output ('foo' using );
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
                        'expected_err_regex' => "Syntax error, unexpected symbol at or near '\\)'",
                        },
                        {
			# alias name missing from define
                        'num' => 4,
                        'pig' => q#
define `perl PigStreaming.pl foo -`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
                        'expected_err_regex' => "mismatched input '`perl PigStreaming.pl foo -`' expecting IDENTIFIER_L",
                        },
                        {
			# quotes missing from name of the file in ship script
                        'num' => 5,
                        'pig' => q#
define CMD `perl PigStreaming.pl foo -` ship(:SCRIPTHOMEPATH:/PigStreaming.pl);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
                        'expected_err_regex' => "mismatched input '.' expecting RIGHT_PAREN",
                        },
                ]
                },
				{
		# relevant information missing/incorrect in streaming command
                'name' => 'CmdErrors',
                'tests' => [
                        {
			# Define uses using non-existent command (autoship)
                        'num' => 1,
			'execonly' => 'mapred',
                        'pig' => q\
define CMD `perl PigStreamingNotThere.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
                        'expected_err_regex' => "failed with exit status: 2",
                        },
                        {
			# Define uses non-existent command with ship clause
                        'num' => 2,
                        'pig' => q\
define CMD `perl PigStreamingNotThere.pl foo -` ship(':SCRIPTHOMEPATH:/PigStreamingNotThere.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
                        'expected_err_regex' => "Invalid ship specification: .*/PigStreamingNotThere.pl.* does not exist!",
                        },
                        {
			# Define uses non-existent command with cache clause
                        'num' => 3,
                        'pig' => q\
define CMD `perl PigStreaming.pl - - nameMap` ship(':SCRIPTHOMEPATH:/PigStreaming.pl') cache(':SCRIPTHOMEPATH:/PigStreamingNotThere.pl#NotThere');
A = load ':INPATH:/singlefile/studenttab10k';
B = foreach A generate $0;
C = stream B through CMD as (name);
D = group C by name;
E = foreach D generate group, COUNT(C);
dump E;\,
                        'expected_err_regex' => "Invalid cache specification: .*/PigStreamingNotThere.pl.*",
                        },
                        {
			# Define uses non-existent serializer
                        'num' => 4,
                        'pig' => q\
define CMD `perl PigStreaming.pl foo -` input('foo' using SerializerNotThere()) ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
                        'expected_err_regex' => "Could not resolve SerializerNotThere using imports",
                        },
                        {
			# Define uses non-existent deserializer
                        'num' => 5,
                        'pig' => q\
define CMD `perl PigStreaming.pl` output(stdout using DeserializerNotThere()) ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
                        'expected_err_regex' => "Could not resolve DeserializerNotThere using imports",
                        },
                        {
			# Invalid skip path
                        'num' => 6,
                        'pig' => q\
set stream.skippath 'foo';
define CMD `perl PigStreaming.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
                        'expected_err_regex' => "Invalid value for stream.skippath.*",
                        },
                        {
			# Invalid command alias in stream operator
                        'num' => 7,
                        'pig' => q\
define CMD `perl PigStreaming.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD1;
dump B;\,
                        'expected_err_regex' => "pig script failed to validate: Undefined command-alias \\[CMD1\\]",
                        },
                        {
			# Invalid operator alias in stream operator
                        'num' => 8,
                        'pig' => q\
define CMD `perl PigStreaming.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream C through CMD;
dump B;\,
                        'expected_err_regex' => "Undefined alias: C",
                        },
                ]
                },
				{
		# streaming application failures
                'name' => 'StreamingErrors',
                'tests' => [
                        {
			# Streaming application fails in the beginning of processing
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 1,
			'execonly' => 'mapred',
                        'pig' => q\
define CMD `perl PigStreamingBad.pl start` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl')  stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 1",
                        },
                        {
			# Streaming application fails in the middle of processing
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 2,
			'execonly' => 'mapred',
                        'pig' => q\
define CMD `perl PigStreamingBad.pl middle` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl')  stderr('CMD' limit 1); 
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 2",
                        },
                        {
			# Streaming application fails in the end of processing
			# bring logs to dfs
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 3,
			'execonly' => 'mapred',
                        'pig' => q\
define CMD `perl PigStreamingBad.pl end` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 3",
                        },
                        {
			# Streaming application fails in the randomly during processing
			# bring logs to dfs
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 4,
			'execonly' => 'mapred',
                        'pig' => q\
define CMD `perl DieRandomly.pl 10000 2` ship(':SCRIPTHOMEPATH:/DieRandomly.pl') stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 2",
                        },
                ]
		},
		{
		# errors (de)serializing data for streaming
                'name' => 'IOErrors',
                'tests' => [
                        {
                        # Invalid deserializer - throws exception
                        'num' => 1,
			'execonly' => 'mapred',
                        'pig' => q\
register :FUNCPATH:/testudf.jar;
define CMD `perl PigStreaming.pl` input(stdin) output(stdout using org.apache.pig.test.udf.streaming.DumpStreamerBad) ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "Error reading output from Streaming binary",
                        },
			{
			# Invalid serializer - throws exception
                        'num' => 2,
			'execonly' => 'mapred',
                        'pig' => q\
define CMD `perl PigStreamingDepend.pl` input(stdin using StringStoreBad) ship(':SCRIPTHOMEPATH:/PigStreamingDepend.pl', ':SCRIPTHOMEPATH:/PigStreamingModule.pm');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through `perl PigStreaming.pl`;
C = stream B through CMD as (name, age, gpa);
D = foreach C generate name, age;
store D into ':OUTPATH:';\,
                        'expected_err_regex' => "Could not resolve StringStoreBad using imports",
                        },
                ]
		},
		{
		'name' => 'StreamingLocalErrors',
		'tests' => [
			{
			# Define uses using non-existent command
                        'num' => 1,
						'execonly' => 'local',
                        'pig' => q\
define CMD `perl PigStreamingNotThere.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
                        'expected_err_regex' => "Can't open perl script .*PigStreamingNotThere.pl.*: No such file or directory",
                        },
                        {
			# Streaming application fails in the beginning of processing
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 2,
			'execonly' => 'local',
                        'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl start`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 1",
                        },
                        {
			# Streaming application fails in the middle of processing
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 3,
			'execonly' => 'local',
                        'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl middle`; 
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 2",
                        },
                        {
			# Streaming application fails in the middle of processing
			# bring logs to dfs
			# NEED TO CHECK STDERR MANUALLY FOR NOW
                        'num' => 4,
			'execonly' => 'local',
                        'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl end`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "failed with exit status: 3",
                        },
                        {
                        # Invalid deserializer - throws exception
                        'num' => 5,
			            'execonly' => 'local',
                        'pig' => q\
register :FUNCPATH:/testudf.jar;
define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` input(stdin using PigDump) output(stdout using org.apache.pig.test.udf.storefunc.DumpLoaderBad);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
                        'expected_err_regex' => "Could not resolve PigDump using imports",
                        },
			{
			# Invalid serializer - throws exception
                        'num' => 6,
			            'execonly' => 'local',
                        'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using StringStoreBad);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through `perl PigStreaming.pl`;
C = stream B through CMD as (name, age, gpa);
D = foreach C generate name, age;
store D into ':OUTPATH:';\,
                        'expected_err_regex' => "Could not resolve StringStoreBad using imports",
			},
		]
		},
		{
		'name' => 'LineageErrors',
		'tests' => [
			{
			# UDF returns a bytearray that is cast to an integer
                'num' => 1,
                'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by name lt 'b';
c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
d = foreach c generate $0#'alice young';
split d into e if $0 < 42, f if $0 >= 42;
store e into ':OUTPATH:';\,
                'expected_err_regex' => "Received a bytearray from the UDF. Cannot determine how to convert the bytearray",
            },
        ]
        }
    ]
}
;



